	 set more off
	 
	 global directory1 "/Users/lcarvalh/Documents/UKB - May 2020"	
	
	 global directory "/Users/lcarvalh/Documents/GENETICS & POVERTY/FINAL DATA JPE Micro"
	
	////////////////////////////////////////
	// 									  //
	// 		HOSPITAL INPATIENT DATA 	  //
	// 									  //
	////////////////////////////////////////
	
	use s_41270_* s_41271_* n_eid using "$directory/Master.dta", clear
	
	gen alcohol_abuse = 0
		
	forvalues i = 0/225 {

		gen letter`i'= regexs(0) if regexm(s_41270_0_`i',"^[A-Z]")
		gen number`i'= regexs(0) if regexm(s_41270_0_`i',"([0-9]*)$")
		
		drop s_41270_0_`i'
		destring number`i', force replace

		*** CODES BELOW COME FROM APPENDIX 1 OF "Local Alcohol Profiles for England 2017 user guide"
		*** https://fingertips.phe.org.uk/documents/LAPE_2017_User_Guide_071117.pdf
	
		*** AND FROM APPENDIX A of "Alcohol-related Hospital Statistics Scotland 2014-2015"
		*** https://www.isdscotland.org/Health-Topics/Drugs-and-Alcohol-Misuse/Publications/2015-10-13/2015-10-13-ARHS2014-15-Report.pdf?6892031432
		
		replace alcohol_abuse = alcohol_abuse + 1	if	(letter`i' == "E" & number`i' == 244) | ///
														(letter`i' == "F" & number`i' == 10) | ///
														(letter`i' == "F" & (number`i' >= 100 & number`i' <= 109)) | ///
														(letter`i' == "G" & number`i' == 312) | ///
														(letter`i' == "G" & number`i' == 621) | ///
														(letter`i' == "G" & number`i' == 721) | ///
														(letter`i' == "I" & number`i' == 426) | ///
														(letter`i' == "K" & number`i' == 292) | ///
														(letter`i' == "K" & number`i' == 70) | ///
														(letter`i' == "K" & (number`i' >= 700 & number`i' <= 709)) | ///
														(letter`i' == "K" & number`i' == 852) | ///
														(letter`i' == "K" & number`i' == 860) | ///
														(letter`i' == "Q" & number`i' == 860) | ///
														(letter`i' == "R" & number`i' == 780) | ///
														(letter`i' == "X" & number`i' == 45) | ///					
														(letter`i' == "X" & (number`i' >= 450 & number`i' <= 459)) | ///					
														(letter`i' == "X" & number`i' == 4599) | ///					
														(letter`i' == "X" & number`i' == 65) | ///					
														(letter`i' == "X" & (number`i' >= 650 & number`i' <= 659)) | ///					
														(letter`i' == "X" & number`i' == 6598) | ///					
														(letter`i' == "Y" & number`i' == 15) | ///							
														(letter`i' == "Y" & (number`i' >= 150 & number`i' <= 159)) | ///							
														(letter`i' == "Y" & number`i' == 90) | ///							
														(letter`i' == "Y" & (number`i' >= 900 & number`i' <= 909)) | ///							
														(letter`i' == "Y" & number`i' == 91) | ///							
														(letter`i' == "Y" & (number`i' >= 910 & number`i' <= 919)) | ///
														(letter`i' == "E" & number`i' == 512) | /// USED in SCOTLAND but NOT IN ENGLAND						
														(letter`i' == "O" & number`i' == 354) | /// USED in SCOTLAND but NOT IN ENGLAND						
														(letter`i' == "P" & number`i' == 043) | /// USED in SCOTLAND but NOT IN ENGLAND						
														(letter`i' == "Y" & number`i' == 573) | /// USED in SCOTLAND but NOT IN ENGLAND						
														(letter`i' == "Z" & number`i' == 502) | /// USED in SCOTLAND but NOT IN ENGLAND						
														(letter`i' == "Z" & number`i' == 714) | /// USED in SCOTLAND but NOT IN ENGLAND						
														(letter`i' == "Z" & number`i' == 721)   //  USED in SCOTLAND but NOT IN ENGLAND
													
		drop letter`i' number`i'
		
	}
	
	*** ICD-9 for SCOTLAND
	
	forvalues i = 0/46 {

		gen letter`i'= regexs(0) if regexm(s_41271_0_`i',"^[A-Z]")
		gen number`i'= regexs(0) if regexm(s_41271_0_`i',"([0-9]*)$")
		
		drop s_41271_0_`i'
		destring number`i', force replace
		
		*** CODES BELOW COME FROM APPENDIX A of "Alcohol-related Hospital Statistics Scotland 2014-2015"
		*** https://www.isdscotland.org/Health-Topics/Drugs-and-Alcohol-Misuse/Publications/2015-10-13/2015-10-13-ARHS2014-15-Report.pdf?6892031432
		
		replace alcohol_abuse = alcohol_abuse + 1	if	number`i' == 2651 | ///
														number`i' == 3039 | ///
														number`i' == 3050 | ///
														number`i' == 2918 | ///
														number`i' == 2910 | ///
														number`i' == 2913 | ///
														number`i' == 2915 | ///
														number`i' == 2919 | ///
														number`i' == 2911 | ///
														number`i' == 2912 | ///
														number`i' == 3575 | ///
														number`i' == 4255 | ///
														number`i' == 5353 | ///
														number`i' == 5710 | ///
														number`i' == 5711 | ///
														number`i' == 5712 | ///
														number`i' == 5713 | ///
														number`i' == 7607 | ///
														number`i' == 7598 | ///
														number`i' == 7903 | ///
														number`i' == 9800 | ///
														number`i' == 9801 | ///
														number`i' == 9809 | ///
														(letter`i' == "E" & number`i' == 8600) | /// 														
														(letter`i' == "E" & number`i' == 8601) | /// 														
														(letter`i' == "E" & number`i' == 8602) | /// 														
														(letter`i' == "E" & number`i' == 8609) | /// 														
														(letter`i' == "E" & number`i' == 9473) 			
																						
		drop letter`i' number`i'
		
		}	
	
		
	keep alcohol_abuse n_eid
		
	
	sort n_eid
	save "$directory/Hospital_Inpatient", replace
	*/
	***************************************
	***************************************
	
	use "$directory/Master.dta", clear
			
	***************************************
	*  Merging Hospital Inpatient Data 	  *
	***************************************

	merge 1:1 n_eid using "$directory/Hospital_Inpatient", keep(master match) nogen
	
	********************************************
	** Labor Force Status
	********************************************

	gen working = 0 if (n_6142_0_0 >= 1 & n_6142_0_0 <= 7) | n_6142_0_0 == -7
		forvalues i = 0/6 {
			replace working = 1 if n_6142_0_`i' == 1
		}
	
	***************
	** Occuppation
	***************

	tostring n_132_0_0, generate(str_n_132_0_0)
	gen str_SOC2000= substr(str_n_132_0_0,1,4)
	destring str_SOC2000, gen(SOC2000)
	
	replace SOC2000 = .	if SOC2000 == 0 | working == 0	
	
	******************************
	*** Gender
	******************************

	ren n_31_0_0 male
	
	***************************************
	*** MERGING ASHE Data
	***************************************
	
	sort SOC2000 male
	merge m:1 SOC2000 male using "$directory/ASHE_Data_2009.dta", keep(master match) nogen
	
	***************************************
	*** MERGING O*NET Data
	***************************************
	
	merge m:1 SOC2000 using "$directory/O*NET.dta", keep(master match) nogen	

	******************************
	*** Qualifications
	******************************
	
	gen noqual = .
		replace noqual = 0					if (n_6138_0_0 >= 1 & n_6138_0_0 <= 6) | n_6138_0_0 == -7
		replace noqual = 1					if n_6138_0_0 == -7 
	
	gen Olevel = .
		replace Olevel = 0					if (n_6138_0_0 >= 1 & n_6138_0_0 <= 6) | n_6138_0_0 == -7
		forvalues i = 0/5 {
			replace Olevel = 1				if  n_6138_0_`i' == 3 
		}	
		
	gen CSE = .
		replace CSE = 0						if (n_6138_0_0 >= 1 & n_6138_0_0 <= 6) | n_6138_0_0 == -7
		forvalues i = 0/5 {
			replace CSE = 1					if  n_6138_0_`i' == 4
		}	
		
	gen CSE_Olevel_only = .
		replace CSE_Olevel_only = 0			if (n_6138_0_0 >= 1 & n_6138_0_0 <= 6) | n_6138_0_0 == -7
		forvalues i = 0/5 {
			replace CSE_Olevel_only = 1		if  n_6138_0_`i' == 3 | n_6138_0_`i' == 4
		}	
	
	gen Alevel = .
		replace Alevel = 0					if (n_6138_0_0 >= 1 & n_6138_0_0 <= 6) | n_6138_0_0 == -7
		forvalues i = 0/5 {
			replace Alevel = 1				if  n_6138_0_`i' == 2 
		}
		
	gen NVQ = .
		replace NVQ = 0						if (n_6138_0_0 >= 1 & n_6138_0_0 <= 6) | n_6138_0_0 == -7
		forvalues i = 0/5 {
			replace NVQ = 1					if  n_6138_0_`i' == 5 
		}

	gen professional = .
		replace professional = 0			if (n_6138_0_0 >= 1 & n_6138_0_0 <= 6) | n_6138_0_0 == -7
		forvalues i = 0/5 {
			replace professional = 1		if  n_6138_0_`i' == 6 
		}	
		
	gen college = .
		replace college = 0					if (n_6138_0_0 >= 1 & n_6138_0_0 <= 6) | n_6138_0_0 == -7
		forvalues i = 0/5 {
			replace college = 1				if  n_6138_0_`i' == 1 
		}
	
	******************************
	*** Years of Schooling
	******************************	
		
	gen temp = .
		replace temp = (n_845_0_0 - 5)		if NVQ == 1 & n_845_0_0 >= 12 & n_845_0_0 < . // NVQ or HND or HNC or equivalent	
			
	gen YoS = .		
		replace YoS = 7		if noqual == 1  // No Qualification
		replace YoS = 10	if CSE == 1 | Olevel == 1 // O-level or CSE
		replace YoS = 13	if Alevel == 1 // A-level
		replace YoS = 15	if professional == 1 // Other professional qualifications
		replace YoS = temp	if NVQ == 1  & temp < . & ((temp > YoS & YoS < .) | YoS >= .)  // NVQ or HND or HNC or equivalent
		replace YoS = 20	if college == 1 // College
			

	gen CSE_Olevel = 1 - noqual // assume have either CSE or O-Level if have any qualification
							
	replace Alevel = 1						if college == 1 | NVQ == 1 | professional == 1 // assume have A-level if has college, NVQ or professional
				
	******************************
	*** School leaving age
	******************************
	 
	ren n_845_0_0 SLA
	replace SLA=. 	if SLA<0
	replace SLA=21 	if college==1
		
	gen compulsory = 0	if SLA < .
		replace compulsory = 1 if SLA <= 15 & ts_33_0_0 <  td(01sep1957)
		replace compulsory = 1 if SLA <= 16 & ts_33_0_0 >= td(01sep1957) 

	gen edu16=(SLA>=16) if SLA<.
		
	********************************************
	** Average total household income before tax
	********************************************
	
	replace n_738_0_0 = .	if n_738_0_0 < 0
	
	gen Inc_less_18k = 0 	if n_738_0_0 < .
		replace Inc_less_18k = 1	if n_738_0_0 == 1
		
	gen Inc_more_31k = 0 	if n_738_0_0 < .
		replace Inc_more_31k = 1	if n_738_0_0 >= 3 & n_738_0_0 <= 5 
		
	gen Inc_more_52k = 0 	if n_738_0_0 < .
		replace Inc_more_52k = 1	if n_738_0_0 >= 4 & n_738_0_0 <= 5 
		
	gen Inc_more_100k = 0 	if n_738_0_0 < .
		replace Inc_more_100k = 1	if n_738_0_0 == 5

	gen Inc_more_18k = 1 - Inc_less_18k
		
	***************************************
	* 				Wages  				  *
	***************************************
		
	replace wages = wages * 52
	replace wages = 0	if working == 0 
			
	gen lnwages = ln(wages)		
			
	clonevar cond_wages   = wages
	replace cond_wages    = .	if working == 0
		
	ren wages uncond_wages
		
	**************************************
	** Own or rent accommodation lived in
	**************************************
	
	replace n_680_0_0 = . if n_680_0_0 < 0
	
	gen home_ownership = (n_680_0_0 == 1 | n_680_0_0 == 2) if n_680_0_0 < .
	 
	***************************************
	*  			Merging Biomarkers 	  	  *
	***************************************
	
	ren n_30740_0_0 N_glucose
	ren n_30730_0_0 N_GGT
	ren n_30850_0_0 testosterone
	
	***************************************
	*  			Merging Accelerometer 	  	  *
	***************************************
	
	ren n_90087_0_0 N_accelerometer
	
	********************************************
	** Cognitive Performance
	********************************************
	
	* Reaction Time
	gen reaction_time = -n_20023_0_0
	
	* Pairs matcing	
	gen visual_memory = -n_399_0_2
			
	* Prospective memory	
	gen prospective_memory = .
		replace prospective_memory = 0 if n_20018_0_0 == 0 | n_20018_0_0 == 2
		replace prospective_memory = 1 if n_20018_0_0 == 1
	
	* Fluid intelligence	
	ren n_20016_0_0 fluid_intelligence
	
	******************************
	*** Neuroticism
	******************************
	
	ren n_20127_0_0 neuroticism
	
	******************************
	*** Risk Taking
	******************************
	
	replace n_2040_0_0 = . if n_2040_0_0 < 0
	ren n_2040_0_0 risk_taking

	******************************
	*** Smoking
	******************************
	
	ren n_20160_0_0 ever_smoked
	
	gen currently_smoking = .
		replace currently_smoking = 0 if n_20116_0_0 == 0 |  n_20116_0_0 == 1
		replace currently_smoking = 1 if n_20116_0_0 == 2 // this includes also those who only smoke occasionally 
		
	replace n_3436_0_0 = . if n_3436_0_0 < 0
	replace n_2867_0_0 = . if n_2867_0_0 < 0
		
	gen age_started_smoking = .
		replace age_started_smoking = n_2867_0_0	if n_2867_0_0 < . & n_1249_0_0 == 1
		replace age_started_smoking = n_3436_0_0	if n_3436_0_0 < . & n_1239_0_0 == 1
	
	gen Mage_started_smoking = 0
		replace Mage_started_smoking = 1	if n_1249_0_0 == 1 & n_2867_0_0 >= .
		replace Mage_started_smoking = 1	if n_1239_0_0 == 1 & n_3436_0_0 >= .
		
		gen started_smoking_age16 = 0
			replace started_smoking_age16 = 1	if age_started_smoking  <= 16
			replace started_smoking_age16 = .	if Mage_started_smoking == 1	
	
	gen nolonger_smoking = .
		replace nolonger_smoking = 0	if ever_smoked == 1 & currently_smoking == 1
		replace nolonger_smoking = 1	if ever_smoked == 1 & currently_smoking == 0
	
	******************************
	*** Obesity
	******************************

	ren n_21001_0_0 BMI2
	gen obese2=(BMI2>=30) if BMI2~=.

	***************************************
	*** STANDARDIZING SOME VARIABLES
	***************************************

	foreach var in reaction_time visual_memory neuroticism fluid_intelligence N_accelerometer N_glucose N_GGT {
		summ `var' if score < . & parentsscore < . 
		replace `var' = (`var' - r(mean))/r(sd)
	}	
	
	*** standardizing testosterone separately by gender	
	gen N_testosterone = .
	summ testosterone if male == 0 & score < . & parentsscore < .
	replace N_testosterone = (testosterone - r(mean))/r(sd)		if male == 0 & testosterone < .
	summ testosterone if male == 1 & score < . & parentsscore < .
	replace N_testosterone = (testosterone - r(mean))/r(sd)		if male == 1 & testosterone < .
		
	
	save "$directory/Full_Sample", replace	
	
	***************************************
	* 	   		Data from Parents 	 	  *
	***************************************
	
	use "$directory/Linking_Children_to_Their_Parents.dta", clear 

	foreach parent in father mother {
				
		preserve
		
			keep `parent'_id child_id
			
			ren `parent'_id n_eid
		
			merge m:1 n_eid using "$directory/Full_Sample", keep(match) nogen keepusing(n_eid YoS SLA noqual CSE Olevel Alevel college Inc_less_18k Inc_more_18k Inc_more_31k Inc_more_52k Inc_more_100k compulsory)
		
			drop n_eid
		
			foreach var in YoS SLA noqual Alevel college CSE Olevel Inc_less_18k Inc_more_18k Inc_more_31k Inc_more_52k Inc_more_100k compulsory {
				ren `var' `var'_`parent'
			}
			
			
			ren child_id n_eid
			
			save "$directory/`parent'", replace
		
		restore
		
	}	
		
	***************************************
	* 	   Merging Data on Parents 	  *
	***************************************
	
	use "$directory/Full_Sample", clear
	
	merge 1:1 n_eid using "$directory/father", keep(master match) nogen
	merge 1:1 n_eid using "$directory/mother", keep(master match) nogen
				
			
	// take average when both parents are available 	
	foreach var in YoS SLA noqual CSE Olevel Alevel college Inc_less_18k Inc_more_18k Inc_more_31k Inc_more_52k Inc_more_100k compulsory {
		qui gen `var'_parent = .
		replace `var'_parent = `var'_father if `var'_father  < . & `var'_mother >= .
		replace `var'_parent = `var'_mother if `var'_mother  < . & `var'_father >= .
		replace `var'_parent = (`var'_father + `var'_mother)/2 if `var'_father  < . & `var'_mother < .		
	}		
	
	
	preserve
	
		keep if  score < . & parentsscore < .
		
		save "$directory/Parents_Sample", replace		
	
	restore
	
	preserve
	
		keep if score < . & fam < . // variable "fam" identifies set of siblings who participated in UKB
	
		save "$directory/Siblings_Sample", replace		

	restore

	
	preserve
	
		keep if  score < .
		
		keep if YoS_parent < . | SLA_parent < . | Inc_less_18k_parent < .
		
		save "$directory/Two_Generations_Sample", replace		
	
	restore
	
	preserve
		
		**************************
		*** 	Date of birth
		**************************

		ren n_52_0_0  month_birth
			
		ren ts_33_0_0 date_birth
		gen DoB = date_birth - td(01sep1957) // we substract td(01sep1957) such that DoB is 0 at Sep 1, 1957

		**************************
		***    Country of birth
		**************************

		ren n_1647_0_0 country_of_birth
		
		**************************
		***  Year of Immigration
		**************************
		
		ren n_34_0_0  year_birth
		ren n_3659_0_0  year_immigration
			replace year_immigration = .	if year_immigration < 0
			
		gen age_immigration = year_immigration - year_birth
				
		***************************************
		* Restricting to obs within bandwidth *
		***************************************
		
		keep if date_birth >= td(01sep1947) & date_birth <= td(31aug1967)
		
		***************************************
		* Restricting to those born in England*
		* 		    Scotland or Wales	      *
		***************************************
		
		keep if country_of_birth == 1 | country_of_birth == 2 | country_of_birth == 3 | age_immigration <= 14
		 
		***************************************
		* Drop if school leaving age missing  *
		***************************************
		
		drop if SLA >= .
		 
		******************************
		*** Post and post-trend
		******************************

		gen after=(DoB>=0)
		gen DoBafter  = DoB*after
		
		gen score_after = score * after
		gen parentsscore_after = parentsscore * after

		gen DoB2 = DoB * DoB
		gen DoB2after = DoB2 * after

		***************************************
		* 		Triangular weights 			  *
		***************************************
		
		gen w10 = 3652 - abs(DoB) - after
			replace w10 = 0 if w10 < 0
	
		
		save "$directory/ROSLA_Sample", replace		

	restore	
		
	***************************************
	***************************************
